Initialize: load libraries and custom functions
setwd("~/google_drive/ImmgenT/jamboree2/immgent_jamboree2_git/")
libs = c("Seurat", "ggplot2", "viridis", "pheatmap", "reshape2", "dplyr")
sapply(libs, function(x) suppressMessages(suppressWarnings(library(x, character.only = TRUE, quietly = T, warn.conflicts = F))))
## $Seurat
## [1] "Seurat" "SeuratObject" "sp" "stats" "graphics"
## [6] "grDevices" "utils" "datasets" "methods" "base"
##
## $ggplot2
## [1] "ggplot2" "Seurat" "SeuratObject" "sp" "stats"
## [6] "graphics" "grDevices" "utils" "datasets" "methods"
## [11] "base"
##
## $viridis
## [1] "viridis" "viridisLite" "ggplot2" "Seurat" "SeuratObject"
## [6] "sp" "stats" "graphics" "grDevices" "utils"
## [11] "datasets" "methods" "base"
##
## $pheatmap
## [1] "pheatmap" "viridis" "viridisLite" "ggplot2" "Seurat"
## [6] "SeuratObject" "sp" "stats" "graphics" "grDevices"
## [11] "utils" "datasets" "methods" "base"
##
## $reshape2
## [1] "reshape2" "pheatmap" "viridis" "viridisLite" "ggplot2"
## [6] "Seurat" "SeuratObject" "sp" "stats" "graphics"
## [11] "grDevices" "utils" "datasets" "methods" "base"
##
## $dplyr
## [1] "dplyr" "reshape2" "pheatmap" "viridis" "viridisLite"
## [6] "ggplot2" "Seurat" "SeuratObject" "sp" "stats"
## [11] "graphics" "grDevices" "utils" "datasets" "methods"
## [16] "base"
#options(Seurat.object.assay.version = 'v5') #if using V5
#mypal = c('lightgrey','blue','green','orange','red','yellow','lightsalmon','orchid4','pink3','gold4', 'yellowgreen','cyan4','brown','thistle3','tomato3','orange2','mediumpurple1')
#Large color palette:
library("pals")
##
## Attaching package: 'pals'
## The following objects are masked from 'package:viridis':
##
## cividis, inferno, magma, plasma, turbo, viridis
## The following objects are masked from 'package:viridisLite':
##
## cividis, inferno, magma, plasma, turbo, viridis
library("RColorBrewer")
n = 70
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
mypal1 = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))))
mypal1 = mypal1[-4]
parade = function(n) { return(Seurat::DiscretePalette(n, palette = "parade", shuffle = F)) }
length(glasbey())
## [1] 32
length(polychrome())
## [1] 36
mypal = c(glasbey(), polychrome(), mypal1)
names(mypal) = NULL
Transfer metadata from your previous object to the integrated
# Load in your original small dataset
orig_sc = readRDS('dataset_clean.Rds') #~/google_drive/ImmgenT/exp_id/20220912_exp_id_16_CBDM_scurfy/IGT24/dataset_clean.Rds
orig_sc$orig_RNA_clusters = orig_sc$RNA_clusters
# Load in the Integrated dataset
integrated_sc = readRDS('IGT24_withsketch_SeuratV4.Rds')
orig_sc$RNA_clusters = as.character(orig_sc$RNA_clusters)
integrated_sc$orig_RNA_clusters = NA
all(colnames(integrated_sc)[match(colnames(orig_sc), colnames(integrated_sc))] == colnames(orig_sc))
## [1] TRUE
integrated_sc$orig_RNA_clusters[match(colnames(orig_sc), colnames(integrated_sc))] = orig_sc$RNA_clusters
DimPlot(integrated_sc, reduction = "umap_totalvi", group.by = "orig_RNA_clusters", order = TRUE, cols = mypal, label = T, label.box = F)
DimPlot(integrated_sc, reduction = "umap_totalvi", group.by = "ClusterTOTALVI_Res1", order = TRUE, cols = mypal, label = T, label.box = F)
Compare my clusters to the integrated clusters
integrated_sc$ClusterTOTALVI_Res1 = factor(integrated_sc$ClusterTOTALVI_Res1, levels = as.character(c(0:(length(unique(integrated_sc$ClusterTOTALVI_Res1))-1))))
integrated_sc$orig_RNA_clusters = factor(integrated_sc$orig_RNA_clusters, levels = as.character(c(1:(length(unique(integrated_sc$orig_RNA_clusters))-1))))
#Subset your object to keep only your data
so = integrated_sc[,integrated_sc$IGT == "IGT24"]
so$ClusterTOTALVI_Res1 = factor(so$ClusterTOTALVI_Res1, levels = as.character(c(0:(length(unique(so$ClusterTOTALVI_Res1))-1))))
so$orig_RNA_clusters = factor(so$orig_RNA_clusters, levels = as.character(c(1:(length(unique(so$orig_RNA_clusters))-1))))
tmp = table(so$orig_RNA_clusters, so$ClusterTOTALVI_Res1)
tmp3 = tmp / rowSums(tmp)
rowSums(tmp3)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
ColorRamp = rev(viridis(100))
tmp3[tmp3==0] = NA
labels_matrix = ifelse(is.na(tmp3), yes = "", no = as.character(signif(tmp3,2)*100))
#labels_matrix = matrix(labels_matrix, nrow(data_matrix), ncol(data_matrix))
pheatmap(mat = signif(tmp3,2)*100, cluster_rows = F, cluster_cols = F, display_numbers = labels_matrix, number_format = "%.0f", breaks = seq(0,100,length.out = length(ColorRamp)+1), col = ColorRamp, fontsize_row = 10, fontsize_col = 10, fontsize = 15, main = "Percent of cells of my original cluster (rows) in the integrated clusters (cols)", na_col = "#FDE725")
Distribution of the integrated clusters in each sample
#Subset your object to keep only your data
so = integrated_sc[,integrated_sc$IGT == "IGT24"]
df = data.frame(cluster = factor(sprintf("cl%s",so@meta.data[,"ClusterTOTALVI_Res1"]), levels = sprintf("cl%s",as.character(c(0:(length(unique(so@meta.data[,"ClusterTOTALVI_Res1"]))-1))))),
sample = factor(so$sample_name.1, levels = unique(so$sample_name.1)) )
tmp = table(df$cluster, df$sample)
tmp2 = t(t(tmp) / colSums(tmp)) # normalize the number of cells per sample
colSums(tmp2)
## WT D19 SPL WT SPL D3 WT SPL D12
## 1 1 1
## Foxp3.p327fs SPL D 19 Foxp3.p327fs SPL D3 WT LUNG D19
## 1 1 1
## Foxp3.p327fs LUNG D19 Foxp3.p327fs COL D19 WT COL D19
## 1 1 1
## Foxp3.p327fs SPL D12
## 1
tmp3 = tmp2
rowSums(tmp3)
## cl0 cl1 cl2 cl3 cl4 cl5
## 2.1614125149 0.8319734045 0.0563947200 0.0314667426 0.8707558050 0.9002874204
## cl6 cl7 cl8 cl9 cl10 cl11
## 0.1297862696 0.3556738954 0.0482725344 0.5294629974 0.3371560254 0.2254185987
## cl12 cl13 cl14 cl15 cl16 cl17
## 0.1570297517 0.0500024601 0.4828918076 0.0229568930 0.0446420424 0.3774682112
## cl18 cl19 cl20 cl21 cl22 cl23
## 0.0236105046 0.1969946263 0.1737327810 0.0958588733 0.1450695312 0.0962772443
## cl24 cl25 cl26 cl27 cl28 cl29
## 0.0678630670 0.1113715208 0.1306584938 0.4067810067 0.0174082896 0.2459507887
## cl30 cl31 cl32 cl33 cl34 cl35
## 0.1226396666 0.0404857035 0.0009372071 0.4154116668 0.0000000000 0.0000000000
## cl36 cl37 cl38 cl39 cl40 cl41
## 0.0086509489 0.0009372071 0.0271027158 0.0019486269 0.0118912304 0.0000000000
## cl42 cl43 cl44 cl45 cl46 cl47
## 0.0101647809 0.0124234179 0.0000000000 0.0090726749 0.0082224855 0.0054828460
tmp4 = melt(tmp3)
head(tmp4)
## Var1 Var2 value
## 1 cl0 WT D19 SPL 0.369070209
## 2 cl1 WT D19 SPL 0.145161290
## 3 cl2 WT D19 SPL 0.000000000
## 4 cl3 WT D19 SPL 0.000000000
## 5 cl4 WT D19 SPL 0.055977230
## 6 cl5 WT D19 SPL 0.003795066
tmp4$sample = df$sample[match(tmp4$Var2, df$sample)]
#pdf(file = sprintf("%s_SampleComposition_%s.pdf", prefix, res), width =30, height =20)
ColorRamp = rev(viridis(100))
tmp3[tmp3==0] = NA
labels_matrix = ifelse(is.na(tmp3), yes = "", no = as.character(signif(tmp3,2)*100))
#labels_matrix = matrix(labels_matrix, nrow(data_matrix), ncol(data_matrix))
pheatmap(mat = signif(tmp3,2)*100, cluster_rows = F, cluster_cols = F, display_numbers = labels_matrix, number_format = "%.0f", breaks = seq(0,100,length.out = length(ColorRamp)+1), col = ColorRamp, fontsize_row = 10, fontsize_col = 10, fontsize = 15, main = "Percent of cells in each sample", na_col = "#FDE725")
ggplot(tmp4) + geom_point(aes(Var1, value, color = sample), size = I(3), alpha = 0.7) + scale_color_manual(values = mypal) + theme_bw()+ ggtitle(label = "Cluster composition in each Sample") + theme(axis.text.x = element_text(angle=75, vjust=0.5, size=10)) + facet_wrap(~sample) + NoLegend()
#dev.off()
Plot genes and proteins
#normalize the ADT and RNA data before plotting
so = NormalizeData(so,assay = "ADT", normalization.method = "CLR", verbose = T)
## Normalizing across features
so = NormalizeData(so,assay = "RNA", normalization.method = "LogNormalize", verbose = T)
NormalizeData(integrated_sc,assay = "ADT", normalization.method = "CLR", verbose = T)
## Normalizing across features
## An object of class Seurat
## 55667 features across 29928 samples within 2 assays
## Active assay: RNA (55487 features, 0 variable features)
## 2 layers present: counts, data
## 1 other assay present: ADT
## 1 dimensional reduction calculated: umap_totalvi
DefaultAssay(integrated_sc) = "ADT"
FeaturePlot(integrated_sc, features = "CD62L", raster = T, slot = "data")
FeaturePlot(integrated_sc, features = "TCRGD", raster = T, slot = "data")
Gene expression differences
library("EnhancedVolcano")
## Loading required package: ggrepel
Idents(so) = so$IGT
markers = FindMarkers(so, ident.1 = "10", ident.2 = "25", group.by = "ClusterTOTALVI_Res1", subset.ident = "IGT24")
EnhancedVolcano(markers, lab = rownames(markers), x = 'avg_log2FC', y = 'p_val', subtitle = "", title = "10 vs 25 in IGT24")
markers = FindMarkers(so, ident.1 = "10", ident.2 = "0", group.by = "ClusterTOTALVI_Res1", subset.ident = "IGT24")
EnhancedVolcano(markers, lab = rownames(markers), x = 'avg_log2FC', y = 'p_val', subtitle = "", title = "10 vs 0 in IGT24")
integrated_sc$clustertest = "others"
integrated_sc$clustertest[integrated_sc$ClusterTOTALVI_Res1 %in% c("40", "46", "26", "24", "19")] = "BottomRight"
Idents(integrated_sc) = integrated_sc$clustertest
DefaultAssay(integrated_sc) = "RNA"
tmp = integrated_sc[,integrated_sc$IGT != "IGT24"]
markers = FindMarkers(tmp, ident.1 = "BottomRight", group.by = "clustertest")
EnhancedVolcano(markers, lab = rownames(markers), x = 'avg_log2FC', y = 'p_val', subtitle = "", title = "BottomRight")
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...
tmp = NormalizeData(tmp,assay = "RNA", normalization.method = "LogNormalize", verbose = T)
FeaturePlot(tmp, features = "Tox", raster = T)
FeaturePlot(tmp, features = "Dock2", raster = T)